import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import plot, iplot, init_notebook_mode
df = pd.read_csv('steam.csv')
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
df.head()
## appid name ... owners price
## 0 10 Counter-Strike ... 10000000-20000000 7.19
## 1 20 Team Fortress Classic ... 5000000-10000000 3.99
## 2 30 Day of Defeat ... 5000000-10000000 3.99
## 3 40 Deathmatch Classic ... 5000000-10000000 3.99
## 4 50 Half-Life: Opposing Force ... 5000000-10000000 3.99
##
## [5 rows x 18 columns]
df['appid'].sample()
## 21998 850780
## Name: appid, dtype: int64
df.isnull().sum()
## appid 0
## name 0
## release_date 0
## english 0
## developer 0
## publisher 0
## platforms 0
## required_age 0
## categories 0
## genres 0
## steamspy_tags 0
## achievements 0
## positive_ratings 0
## negative_ratings 0
## average_playtime 0
## median_playtime 0
## owners 0
## price 0
## dtype: int64
df['appid'].sample()
## 24558 936020
## Name: appid, dtype: int64
df.drop(columns='appid', inplace=True)
df['english'].unique()
## array([1, 0], dtype=int64)
df['english'].unique()
## array([1, 0], dtype=int64)
map_dict = {0: 'non-English', 1: 'English'}
df['english'] = df['english'].map(map_dict)
fig = go.Figure(data=[go.Pie(labels=df['english'].value_counts().index,
values=df['english'].value_counts().values)])
fig.update_traces(textinfo='value', textfont_size=20,
marker=dict(colors=['salmon', 'lightblue'],
line=dict(color='#000000', width=2)))
fig.update_layout(
height=600, width=600, title_text='English and not English pie chart',
xaxis_title='number of songs', yaxis_title='artist', title_x = 0.5,
font=dict(
family="Courier New, monospace",
size=18,
color="black"),
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.2,
xanchor="right",
x=1)
)
fig.show()
import plotly.express as px
#df = px.data.tips()
fig = px.pie(df, values=df['english'].value_counts().values, names=df['english'].value_counts().index,
title='Jogos com idioma ingles',
color_discrete_map={'Thur':'lightcyan'})
fig.update_traces( textinfo='percent+label')
#fig.show()
temos que 98.1% dos games possui idioma ingles e apenas 1,89% nao possui este idioma
df['owners'].value_counts().tail()
## 5000000-10000000 46
## 10000000-20000000 21
## 20000000-50000000 3
## 50000000-100000000 2
## 100000000-200000000 1
## Name: owners, dtype: int64
apenas 1 dos jogos possuem mais de cem milhões de proprietários(jogadores)
top_6_owners = df[
(df['owners'] == '20000000-50000000') |
(df['owners'] == '50000000-100000000') |
(df['owners'] == '100000000-200000000')
].sort_values(by='owners')
print(top_6_owners)
## name ... price
## 22 Dota 2 ... 0.00
## 19 Team Fortress 2 ... 0.00
## 1634 Warframe ... 0.00
## 3362 Unturned ... 0.00
## 25 Counter-Strike: Global Offensive ... 0.00
## 12836 PLAYERUNKNOWN'S BATTLEGROUNDS ... 26.99
##
## [6 rows x 17 columns]
perceba que dota 2 é o jogo mais adquirido ou jogado na steam, um dos motivos pode ser seu preço(gratÃs).
df['release_date'] = pd.to_datetime(df['release_date'])
df['release_date'].head()
## 0 2000-11-01
## 1 1999-04-01
## 2 2003-05-01
## 3 2001-06-01
## 4 1999-11-01
## Name: release_date, dtype: datetime64[ns]
free, not_free = df[df['price'] == 0].shape[0], df[df['price'] != 0].shape[0]
labels = ['free', 'not free']
fig = px.pie(df, values=[free, not_free], names=labels,
title='relação de jogos gratuitos',
color_discrete_map={'Thur':'lightcyan'})
fig.update_traces( textinfo='percent+label')
apenas 9,46% dos games disponÃveis são gratuitos
import plotly.express as px
df['multiplayer'] = df['categories'].apply(lambda x: 'multi-player' in x.lower())
df.drop(columns='categories')
## name release_date ... price multiplayer
## 0 Counter-Strike 2000-11-01 ... 7.19 True
## 1 Team Fortress Classic 1999-04-01 ... 3.99 True
## 2 Day of Defeat 2003-05-01 ... 3.99 True
## 3 Deathmatch Classic 2001-06-01 ... 3.99 True
## 4 Half-Life: Opposing Force 1999-11-01 ... 3.99 True
## ... ... ... ... ... ...
## 27070 Room of Pandora 2019-04-24 ... 2.09 False
## 27071 Cyber Gun 2019-04-23 ... 1.69 False
## 27072 Super Star Blast 2019-04-24 ... 3.99 True
## 27073 New Yankee 7: Deer Hunters 2019-04-17 ... 5.19 False
## 27074 Rune Lord 2019-04-24 ... 5.19 False
##
## [27075 rows x 17 columns]
valor=df['multiplayer'].value_counts().values
fig = px.bar(df, x=['No-multiplayer','Multiplayer'], y= valor, title="Wide-Form Input")
fig.show()
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.